---
title: "Replication Code for 'The Role of Education in Shaping Political Participation
Among Young Voters'"
output: pdf_document
author: "Lucas Leemann, Tabea Palmtag, Mala Walz"
date: "2025-10-21"
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F, warning = FALSE)
options(stringsasFactors = F,
        tinytex.verbose = TRUE)
rm(list=ls(all = T))
```

```{r}
# load libraries
library(pacman)
pacman::p_load(readr, tidyverse, dplyr, tidyr, ggplot2, plm, modelsummary, lfe, sjPlot, viridisLite, cowplot, tinytex, stargazer, tinytable, vtable, knitr, rmarkdown, scales)


# load data
data <- readRDS("data_merged_19.11.2024.rds")
```


# Variable Recoding
```{r}
# schultyp_new teil diejenigen Berufsschüler:innen welche eine Berufsmaturität gemacht haben in die Kategorie "Mittelschule & BM" ein
data$schultyp_new <- NULL
data$schultyp_new[is.na(data$schultyp)] <- NA
data$schultyp_new[data$schultyp == "Berufsschule" & data$BM == 1] <- "High School (Baccalaureate) & Vocational School with Baccalaureate"
data$schultyp_new[data$schultyp == "Mittelschule"] <- "High School (Baccalaureate) & Vocational School with Baccalaureate"
data$schultyp_new[data$schultyp == "Berufsschule" & data$BM == 0] <- "Vocational School"
data$schultyp_new <- as.factor(data$schultyp_new) %>%
  relevel(data$schultyp_new, ref = "Vocational School")

data$schultyp_M_BM_B <- NULL
data$schultyp_M_BM_B[is.na(data$schultyp)] <- NA
data$schultyp_M_BM_B[data$schultyp == "Mittelschule"] <- "High School (Baccalaureate)"
data$schultyp_M_BM_B[data$schultyp == "Berufsschule" & data$BM == 1] <- "Vocational School with Baccalaureate"
data$schultyp_M_BM_B[data$schultyp == "Berufsschule" & data$BM == 0] <- "Vocational School"
# transform the Variable to a ordered factor
data$schultyp_M_BM_B <- factor(
  data$schultyp_M_BM_B,
  levels = c("Vocational School", "Vocational School with Baccalaureate", "High School (Baccalaureate)"),
  ordered = TRUE)


data$schultyp_split <- as.character(data$schultyp)
data$schultyp_split[data$schultyp == "Berufsschule"] <- "Vocational School"
data$schultyp_split[data$schultyp == "Berufsschule" & data$BM == 1] <- "Vocational School with Baccalaureate"
data$schultyp_split[data$schultyp == "Heim- und Sonderschule"] <- "Special Needs School"
data$schultyp_split[data$schultyp == "Volksschule"] <- "Elementary School"
data$schultyp_split[data$schultyp == "Brückenangebot"] <- "Bridging Program"
data$schultyp_split[data$schultyp == "Mittelschule"] <- "High School (Baccalaureate)"
data$schultyp_split[data$schultyp == "Höhere Berufsbildung"] <- "Higher Vocational Education"
# transform the Variable to a ordered factor
data$schultyp_split <- factor(
  data$schultyp_split,
  levels = c("Special Needs School", "Elementary School", "Bridging Program", "Vocational School", "Vocational School with Baccalaureate", "High School (Baccalaureate)", "Higher Vocational Education"),
  ordered = TRUE)



data$migrationBB <- NULL
data$migrationBB[is.na(data$einbuergerung_jahr)] <- "native"
data$migrationBB[! is.na(data$einbuergerung_jahr)] <- "naturalized"
#class(data$migrationBB)
#names(data)
#table(data$schultyp, useNA = "always")
#table(data$gde_name, useNA = "always")
#length(unique(data$pers_id))



data$schultyp <- as.factor(data$schultyp)
data$schultyp <- relevel(data$schultyp, ref = "Volksschule")


data$gde_name <- as.factor(data$gde_name)
data$gde_name <- relevel(data$gde_name, ref = "Wila")

data$zivilstand <- as.factor(data$zivilstand)
data$zivilstand <- relevel(data$zivilstand, ref = "ledig")

data$pers_id <- as.factor(data$pers_id)
data$experiment_group <- as.factor(data$experiment_group)
data$experiment_group <- relevel(data$experiment_group, ref = "control")

data$highest_value <- as.numeric(data$highest_value)

# construct variable "did_first_vote"
data <- data %>%
  arrange(pers_id, datum_abstimmung) %>%  # Sort by person and date
  group_by(pers_id) %>%                   # Group by individual
  dplyr::mutate(
    did_first_vote = case_when(
      all(first_vote == FALSE) ~ NA,                           # If all first_vote are FALSE, set to NA
      any(first_vote == TRUE & participate == TRUE) ~ TRUE,    # If any first_vote == TRUE & participate == TRUE, set all rows to TRUE
      any(first_vote == TRUE & participate == FALSE) ~ FALSE   # If any first_vote == TRUE & participate == FALSE, set all rows to FALSE
    )
  ) %>%
  ungroup()  # Remove grouping


# construct variable "firstAndsecond_vote"
data <- data %>%
 arrange(pers_id, datum_abstimmung) %>%  # Sort by person and date
  group_by(pers_id) %>%                   # Group by individual
  dplyr::mutate(
    # Step 1: Copy did_first_vote to firstAndsecond_vote
    firstAndsecond_vote = did_first_vote,
    n_obs = n(),
    # Step 3: Evaluate the conditions at the group level
    firstAndsecond_vote = case_when(
      n_obs == 1 ~ NA,
      all(is.na(firstAndsecond_vote)) ~ NA,
      any(
        did_first_vote == TRUE &
        first_vote == TRUE &
        participate == TRUE &
          dplyr::lead(participate, 1, default = FALSE) == TRUE
      ) ~ TRUE,  
      TRUE ~ FALSE                           
    )
  ) %>%
  ungroup()
#str(data)
```


# Figure 1
```{r}
# Calculate average participation rate by education (split for BM)
participation_rate_by_education <- data %>%
  group_by(schultyp_split) %>%
  summarise(avg_participation_rate = mean(participate, na.rm = TRUE),  # Calculate average participation rate
            count = n())  # Count the number of observations

# Plot participation rate by education
ggplot(participation_rate_by_education %>%
         filter(!is.na(schultyp_split)),
       aes(x = schultyp_split, y = avg_participation_rate)) +
  geom_bar(stat = "identity") +  # Bar plot with participation rate
  geom_text(aes(label = round(avg_participation_rate * 100, 2)), vjust = -0.5, size = 5) +  # Add labels to bars
  labs(title = "",
       x = "",
       y = "Average Participation Rate") +
  theme_bw() +
  scale_y_continuous(labels = scales::percent_format(scale = 100)) +
  scale_x_discrete(labels = label_wrap(22)) +
  theme(text = element_text(size = 18),
        axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("part_per_schultyp_split_BW.png", plot = last_plot(),
       path = "Figures/final", width= 14, height= 8)
```





```{r}

#data$schultyp_split_german <- as.character(data$schultyp)
#data$schultyp_split_german[data$schultyp == "Berufsschule" & data$BM == 1] <- "Berufsschule #mit BM"
## transform the Variable to a ordered factor
#data$schultyp_split_german <- factor(
#  data$schultyp_split_german,
#  levels = c("Heim- und Sonderschule", "Volksschule", "Brückenangebot", "Berufsschule", #"Berufsschule mit BM", "Mittelschule", "Höhere Berufsbildung"),
#  ordered = TRUE)


## Calculate average participation rate by education (split for BM)
#participation_rate_by_education <- data %>%
#  group_by(schultyp_split_german) %>%
#  summarise(avg_participation_rate = mean(participate, na.rm = TRUE),  # Calculate average participation rate
#            count = n())  # Count the number of observations

# Plot participation rate by education
#ggplot(participation_rate_by_education %>%
#         filter(!is.na(schultyp_split_german)),
#       aes(x = schultyp_split_german, y = avg_participation_rate)) +
#  geom_bar(stat = "identity", fill = "#3F1151") +  # Bar plot with participation rate
# geom_text(aes(label = paste0(round(avg_participation_rate * 100, 1), "%")), 
#            vjust = -0.5, size = 5) + 
#  labs(title = "",
#       x = "",
#       y = "Durchschnittliche Partizipationsrate") +
#  theme_light() +
#  scale_y_continuous(labels = scales::percent_format(scale = 100)) +
#  scale_x_discrete(labels = label_wrap(15)) +
#  theme(text = element_text(size = 18),
#        axis.text.x = element_text(angle = 45, hjust = 1))

#ggsave("part_per_schultyp_split_Olivia.png", plot = last_plot(),
#       path = "C:/Users/mawalz/Documents/MyData/Partizipation_Bildung/Figures/final", width= 14, height= 8)
```



# Figure 2
```{r}
participation_rate_by_education_did_firstvote <- data %>%
  filter(!is.na(schultyp_M_BM_B) & !is.na(did_first_vote)) %>%  # Remove missing values if necessary
  group_by(schultyp_M_BM_B, did_first_vote) %>%
  summarise(avg_participation_rate = mean(participate, na.rm = TRUE),  # Calculate average participation rate
            count = n())  # Count the number of observations in each group


ggplot(participation_rate_by_education_did_firstvote, 
       aes(x = schultyp_M_BM_B, y = avg_participation_rate, fill = did_first_vote)) +
  geom_bar(stat = "identity", position = "dodge") +  # Grouped bar plot with dodge position
  geom_text(aes(label = round(avg_participation_rate *100, 2)), 
            position = position_dodge(width = 0.9), vjust = -0.5, size = 5) +  # Add labels to bars
  labs(title = "", 
       x = "",
       y = "Average Participation Rate",
       fill = "Participated in First Vote?") +
  scale_fill_grey(labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  scale_y_continuous(labels = scales::percent_format(scale = 100)) +
  scale_x_discrete(labels = label_wrap(22)) +
  theme_bw() +
   theme(text = element_text(size = 18),
         axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

ggsave("firstVote_x_educ3_updated_BW.png", plot = last_plot(),
       path = "Figures/final", width= 14, height= 8)
```

# Regressionmodels
## basic model
```{r}
m_1_b <- lm(participate ~ schultyp_new + age_at_vote + gender + migrationBB + zivilstand + adult_household_historic_turnout,
          data = data)


# Define a new dataset for prediction
new_data <- data.frame(
  schultyp_new = c("Vocational School", "High School (Baccalaureate) & Vocational School with Baccalaureate"), # Levels to predict for
  age_at_vote = mean(data$age_at_vote, na.rm = TRUE),    # Hold constant (mean age)
  gender = "male",                                       # Example: hold as "male"
  migrationBB = "native",
  zivilstand = "ledig",
  adult_household_historic_turnout = mean(data$adult_household_historic_turnout, na.rm = TRUE))

# Generate predictions
predictions <- predict(m_1_b, newdata = new_data, interval = "confidence")

# Add predicted values and confidence intervals to the new dataset
new_data$predicted <- predictions[, "fit"] # add fitted values
new_data$lower_ci <- predictions[, "lwr"] # add 95% lower bound
new_data$upper_ci <- predictions[, "upr"] # add 95% upper bound

# Redefine the levels of schultyp_new to change the order
new_data$schultyp_new <- factor(new_data$schultyp_new,
                                levels = c("Vocational School", "High School (Baccalaureate) & Vocational School with Baccalaureate"))

# Visualize predicted values with confidence intervals
P_m_1_b <- ggplot(new_data, aes(x = schultyp_new, y = predicted, color = schultyp_new)) +
  geom_point(size = 4) + 
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.04, size = 0.8) +
  labs(
    title = "",
    x = "",
    y = "Predicted Participation") +
  scale_color_manual(values = c("Vocational School" = "grey7", "High School (Baccalaureate) & Vocational School with Baccalaureate" = "grey70")) +
  scale_y_continuous(
    limits = c(0.27, 0.55),
    breaks = seq(0.3, 0.55, by = 0.05),
    labels = scales::percent_format(scale = 100)) +
  scale_x_discrete(labels = label_wrap(22)) +
  theme_bw() +
  theme(text = element_text(size = 18),
    plot.title = element_text(hjust = 0.5),
    legend.position = "none")
P_m_1_b
#ggsave("m_1_b_test.png", plot = last_plot(),
#       path = "C:/Users/mawalz/Documents/MyData/Partizipation_Bildung/Figures/final", width= 6, height= 6)

```



## interaction model: educationXgender
```{r}
m_2 <- lm(participate ~ schultyp_new*gender + age_at_vote + migrationBB + zivilstand + adult_household_historic_turnout,
          data = data)
#stargazer::stargazer(m_2, type = "text")

# Define a new dataset for prediction with all combinations of schultyp_new and gender
new_data_interaction <- expand.grid(
  schultyp_new = c("Vocational School", "High School (Baccalaureate) & Vocational School with Baccalaureate"), 
  gender = c("male", "female"),  # Include both gender levels
  age_at_vote = mean(data$age_at_vote, na.rm = TRUE),
  migrationBB = "native",
  zivilstand = "ledig",
  adult_household_historic_turnout = mean(data$adult_household_historic_turnout, na.rm = TRUE))

# Generate predictions with standard errors
predictions_interaction <- predict(m_2, newdata = new_data_interaction, interval = "confidence")


# Add predicted values and confidence intervals to the new dataset
new_data_interaction$predicted <- predictions_interaction[, "fit"] # add fitted values
new_data_interaction$lower_ci <- predictions_interaction[, "lwr"] # add 95% lower bound
new_data_interaction$upper_ci <- predictions_interaction[, "upr"] # add 95% upper bound



# Create the plot with colors for schultyp_new and shapes for gender
P_m_2 <- ggplot(new_data_interaction, aes(x = schultyp_new, y = predicted, color = schultyp_new, shape = gender)) +
  geom_point(size = 4) + 
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.04, size = 0.8) +
  labs(
    title = "",
    x = "",
    y = "") +
  scale_color_manual(values = c("Vocational School" = "grey7", "High School (Baccalaureate) & Vocational School with Baccalaureate" = "grey70")) +
  scale_shape_manual(values = c("male" = 16, "female" = 17)) +  # Custom shapes for gender (circle for male, triangle for female)
  scale_y_continuous(
    limits = c(0.27, 0.55),
    breaks = seq(0.3, 0.55, by = 0.05),
    labels = scales::percent_format(scale = 100)) +
  scale_x_discrete(labels = label_wrap(22)) +
  theme_bw() +
  theme(
    text = element_text(size = 18),
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom") +
  guides(shape = guide_legend(title = "Gender:"), color = "none")
P_m_2

legend <- get_plot_component(P_m_2 + theme(legend.margin = margin(0, 0, 0, 0)), "guide-box", return_all=TRUE)[[3]]


ggdraw() +
  draw_plot(P_m_1_b + theme(plot.margin = margin(5.5, 5.5, 40, 5.5, "points")), x = 0, y = .1, width = .5, height = .9) +
  draw_plot(P_m_2 + theme(legend.position = "none", plot.margin = margin(5.5, 5.5, 40, 5.5, "points")), x = 0.5, y = .1, width = .5, height = .9) +
  draw_plot(legend, x = 0, y = 0, width = 1.5, height = .25)

ggsave("new_1_BW.png", plot = last_plot(),
       path = "Figures/final", width= 14, height= 8)
```


## interaction model: educationXmigration
```{r}
m_3 <- lm(participate ~ schultyp_new*migrationBB + age_at_vote + gender + zivilstand + adult_household_historic_turnout,
          data = data)
#stargazer::stargazer(m_3, type = "text")

# Define a new dataset for prediction with all combinations of schultyp_new and gender
new_data_interaction <- expand.grid(
  schultyp_new = c("Vocational School", "High School (Baccalaureate) & Vocational School with Baccalaureate"), 
  migrationBB = c("native", "naturalized"),  # Include both gender levels
  gender = "male",
  age_at_vote = mean(data$age_at_vote, na.rm = TRUE),
  zivilstand = "ledig",
  adult_household_historic_turnout = mean(data$adult_household_historic_turnout, na.rm = TRUE))

# Generate predictions with standard errors
predictions_interaction <- predict(m_3, newdata = new_data_interaction, interval = "confidence")


# Add predicted values and confidence intervals to the new dataset
new_data_interaction$predicted <- predictions_interaction[, "fit"] # add fitted values
new_data_interaction$lower_ci <- predictions_interaction[, "lwr"] # add 95% lower bound
new_data_interaction$upper_ci <- predictions_interaction[, "upr"] # add 95% upper bound


P_m_3 <- ggplot(new_data_interaction, aes(x = schultyp_new, y = predicted, color = schultyp_new, shape = migrationBB)) +
  geom_point(size = 4, position = position_dodge(width = 0.1)) +  # Dodging points
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.04, size = 0.8, position = position_dodge(width = 0.1)) +  # Dodging error bars
  labs(
    title = "",
    x = "",
    y = "Predicted Participation") +
  scale_color_manual(values = c("Vocational School" = "grey7", "High School (Baccalaureate) & Vocational School with Baccalaureate" = "grey70")) +  # Custom colors for schultyp_new
  scale_shape_manual(values = c("native" = 16, "naturalized" = 17)) +  # Custom shapes for migrationBB
  scale_y_continuous(
    limits = c(0.08, 0.8),
    breaks = seq(0.1, 0.8, by = 0.1),
    labels = scales::percent_format(scale = 100)) +
  scale_x_discrete(labels = label_wrap(22)) +
  theme_bw() +
  theme(
    text = element_text(size = 18),
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom") +
  guides(color = "none", shape = guide_legend(title = "Migration Background:"))
P_m_3
```

## interaction model: educationXhousehold turnout
```{r}
m_4 <- lm(participate ~ schultyp_new*adult_household_historic_turnout_fct + gender + age_at_vote + migrationBB + zivilstand,
          data = data)
#stargazer::stargazer(m_4, type = "text")

# Define a new dataset for prediction with all combinations of schultyp_new and gender
new_data_interaction <- expand.grid(
  schultyp_new = c("Vocational School", "High School (Baccalaureate) & Vocational School with Baccalaureate"),
  adult_household_historic_turnout_fct = c("low", "mid", "high"),
  migrationBB = "native",
  gender = "male",
  age_at_vote = mean(data$age_at_vote, na.rm = TRUE),
  zivilstand = "ledig")

# Generate predictions with standard errors
predictions_interaction <- predict(m_4, newdata = new_data_interaction, interval = "confidence")


# Add predicted values and confidence intervals to the new dataset
new_data_interaction$predicted <- predictions_interaction[, "fit"] # add fitted values
new_data_interaction$lower_ci <- predictions_interaction[, "lwr"] # add 95% lower bound
new_data_interaction$upper_ci <- predictions_interaction[, "upr"] # add 95% upper bound

new_data_interaction <- new_data_interaction %>%
  mutate(adult_household_historic_turnout_fct = factor(adult_household_historic_turnout_fct, ordered = TRUE))

# Create the plot with colors for schultyp_new and shapes for gender
P_m_4 <- ggplot(new_data_interaction, aes(x = schultyp_new, y = predicted, color = schultyp_new, shape = adult_household_historic_turnout_fct)) +
  geom_point(size = 4) + 
  geom_errorbar(aes(ymin = lower_ci, ymax = upper_ci), width = 0.04, size = 0.8) +
  labs(
    title = "",
    x = "",
    y = "") +
  scale_color_manual(values = c("Vocational School" = "grey7", "High School (Baccalaureate) & Vocational School with Baccalaureate" = "grey70")) +  # Custom colors for schultyp_new
  scale_shape_manual(values = c("low" = 16, "mid" = 17, "high" = 15)) +  # Custom shapes for gender (circle for male, triangle for female)
  scale_y_continuous(
    limits = c(0.08, 0.8),
    breaks = seq(0.1, 0.8, by = 0.1),
    labels = scales::percent_format(scale = 100)) +
  scale_x_discrete(labels = label_wrap(22)) +
  theme_bw() +
  theme(
    text = element_text(size = 18),
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom") +
  guides(color = "none", shape = guide_legend(title = "Household Voting History:"))# Hide color legend and show only the shape legend
P_m_4

combined_plot <- plot_grid(P_m_3, P_m_4, ncol = 2)
combined_plot
ggsave("new_2_BW.png", plot = combined_plot,
       path = "Figures/final", width= 14, height= 8)
```


## interaction model: educationXsaliency
```{r}
m_6 <- lm(participate ~ highest_value*schultyp_new + mean_stimmbet_perGemeindeWE + age_at_vote + gender + migrationBB + zivilstand + adult_household_historic_turnout,
          data = data)


# Define a new dataset for prediction with all combinations of schultyp_new and gender
new_data_interaction <- expand.grid(
  schultyp_new = c("Vocational School", "High School (Baccalaureate) & Vocational School with Baccalaureate"), 
  highest_value = unique(data$highest_value),
  mean_stimmbet_perGemeindeWE = mean(data$mean_stimmbet_perGemeindeWE, na.rm = TRUE),
  gender = "male",  # Include both gender levels
  age_at_vote = mean(data$age_at_vote, na.rm = TRUE),
  migrationBB = "native",
  zivilstand = "ledig",
  adult_household_historic_turnout = mean(data$adult_household_historic_turnout, na.rm = TRUE))

# Generate predictions with standard errors
predictions_interaction <- predict(m_6, newdata = new_data_interaction, interval = "confidence")


# Add predicted values and confidence intervals to the new dataset
new_data_interaction$predicted <- predictions_interaction[, "fit"] # add fitted values
new_data_interaction$lower_ci <- predictions_interaction[, "lwr"] # add 95% lower bound
new_data_interaction$upper_ci <- predictions_interaction[, "upr"] # add 95% upper bound



# Create the plot
ggplot(new_data_interaction, aes(x = highest_value, y = predicted, color = schultyp_new, fill = schultyp_new)) +
  geom_line(size = 1) + 
  geom_ribbon(aes(ymin = lower_ci, ymax = upper_ci), alpha = 0.2, color = NA) +
  labs(
    title = "",
    x = "Number of Published Articles (saliency)",
    y = "Predicted Participation",
    color = "Education Track:",
    fill = "Education Track:" ) +
  scale_color_manual(values = c("Vocational School" = "grey7", "High School (Baccalaureate) & Vocational School with Baccalaureate" = "grey70")) + # Custom colors for schultyp_new
  scale_fill_manual(values = c("Vocational School" = "grey7", "High School (Baccalaureate) & Vocational School with Baccalaureate" = "grey70")) + 
  scale_y_continuous(
    limits = c(0.2, 0.6),
    breaks = seq(0.2, 0.6, by = 0.1),
    labels = scales::percent_format(scale = 100)) +
  theme_bw() +
  theme(
    text = element_text(size = 18),
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom")

ggsave("saliency_x_educ_lines_BW.png", plot = last_plot(),
       path = "Figures/final", width= 14, height= 8)
```



# making Tables
## summarystatistics
```{r}
data_sumstat <- data %>%
  select(schultyp_new, schultyp_split, did_first_vote, gender, migrationBB, adult_household_historic_turnout_fct, highest_value, age_at_vote, zivilstand, adult_household_historic_turnout, mean_stimmbet_perGemeindeWE)

data_sumstat <- data_sumstat %>%
  mutate(zivilstand = recode(zivilstand, 
                     ledig = "single", 
                     verwitwet = "widowed",
                     geschieden = "divorced",
                     verheiratet = "married"))

data_sumstat <- data_sumstat %>%
  dplyr::rename(
    #"Personal ID" = "pers_id",
    "Education-Track" = "schultyp_new",
    "School Type" = "schultyp_split",
    "Participated in First Vote" = "did_first_vote",
    "Gender" = "gender",
    "Migration Background" = "migrationBB",
    "Household Voting History (fct)" = "adult_household_historic_turnout_fct",
    "Saliency" = "highest_value",
    "Age" = "age_at_vote",
    "Marital Status" = "zivilstand",
    "Household Voting History (num)" = "adult_household_historic_turnout",
    "Turnout per Municipality" = "mean_stimmbet_perGemeindeWE")

data_sumstat$`Migration Background` <- as.factor(data_sumstat$`Migration Background`)
data_sumstat$Age <- as.integer(data_sumstat$Age)
data_sumstat$Saliency <- as.integer(data_sumstat$Saliency)
#str(data_sumstat)

#datasummary_skim(data_sumstat)

vtable::sumtable(data_sumstat)
vtable::sumtable(data_sumstat, out = "latex", file = "Figures/final/summarystat.tex")

```


## Regressiontable
```{r}
#levels(data$schultyp_new)[levels(data$schultyp_new) == "Mittelschule & BM"] <- "Mittelschule_BM"
#
## transform the variable from an ordered factor to a factor for regressionoutput issues
#data$adult_household_historic_turnout_fct <- factor(as.character(data$adult_household_historic_turnout_fct))
#data$adult_household_historic_turnout_fct <- relevel(data$adult_household_historic_turnout_fct , ref = "low")
#
#m_1_b <- lm(participate ~ schultyp_new + age_at_vote + gender + migrationBB + zivilstand + adult_household_historic_turnout,
#          data = data)
#m_2 <- lm(participate ~ schultyp_new*gender + age_at_vote + migrationBB + zivilstand + adult_household_historic_turnout,
#          data = data)
#m_3 <- lm(participate ~ schultyp_new*migrationBB + age_at_vote + gender + zivilstand + adult_household_historic_turnout,
#          data = data)
#m_4 <- lm(participate ~ schultyp_new*adult_household_historic_turnout_fct + gender + age_at_vote + migrationBB + zivilstand,
#          data = data)
#m_6 <- lm(participate ~ highest_value*schultyp_new + mean_stimmbet_perGemeindeWE + age_at_vote + gender + migrationBB + zivilstand + adult_household_historic_turnout,
#          data = data)
#
#
##stargazer::stargazer(m_1_b, m_2, m_3, m_4, m_6,
##                     title = "Regression Results",
##                     align = TRUE,
##                     dep.var.labels = "Political Participation",
##                     no.space = TRUE,
##                     type = "text",
##                     covariate.labels = c("Mittelschule/BM (ref: Berufsschule)", "Mittelschule/BM and Female", "Mittelschule/BM and Naturalizes", "Mittelschule/BM and High #Household Turnout", "Mittelschule/BM and Middle Household Turnout", "Mittelschule/BM #and Saliency", "Female", "Naturalized", "High Household Turnout", "Middle Household #Turnout", "Saliency", "Age", "Marital Status: Divorced (ref: single)", "Marital Status: Married (ref: single)", "Household Turnout (numeric)", "Turnout per Municipality"),
##                     order = c("schultyp_new", "gender", "migrationBB", "adult_household_historic_turnout_fct", "highest_value", "age_at_vote", "zivilstand", #"adult_household_historic_turnout", "mean_stimmbet_perGemeindeWE"))
##
#stargazer::stargazer(m_1_b, m_2, m_3, m_4, m_6,
#                     title = "Regression Results",
#                     align = TRUE,
#                     dep.var.labels = "Political Participation",
#                     no.space = TRUE,
#                     type = "latex",
#                     covariate.labels = c("Mittelschule/BM (ref: Berufsschule)", "Mittelschule/BM and Female", "Mittelschule/BM and Naturalizes", "Mittelschule/BM and High Household Turnout", "Mittelschule/BM #and Middle Household Turnout", "Mittelschule/BM and #Saliency", "Female", "Naturalized", "High Household Turnout", "Middle Household Turnout", "Saliency", "Age", "Marital Status: Divorced (ref: single)", "Marital Status: Married (ref: single)", "Household Turnout (numeric)", "Turnout per Municipality"),
#                     order = c("schultyp_new", "gender", "migrationBB", "adult_household_historic_turnout_fct", "highest_value", "age_at_vote", "zivilstand", "adult_household_historic_turnout", "mean_stimmbet_perGemeindeWE"),
#                      out = "C:/Users/mawalz/Documents/MyData/Partizipation_Bildung/Figures/final/Regressiontable.tex")
```


## Regressiontable 2.0
```{r}
levels(data$schultyp_new)[levels(data$schultyp_new) == "High School (Baccalaureate) & Vocational School with Baccalaureate"] <- "High School (Baccalaureate)_Vocational School with Baccalaureate"

# transform the variable from an ordered factor to a factor for regressionoutput issues
data$adult_household_historic_turnout_fct <- factor(as.character(data$adult_household_historic_turnout_fct))
data$adult_household_historic_turnout_fct <- relevel(data$adult_household_historic_turnout_fct , ref = "low")

m_1_b <- lm(participate ~ schultyp_new + age_at_vote + gender + migrationBB + zivilstand + adult_household_historic_turnout,
          data = data)
m_2 <- lm(participate ~ schultyp_new*gender + age_at_vote + migrationBB + zivilstand + adult_household_historic_turnout,
          data = data)
m_3 <- lm(participate ~ schultyp_new*migrationBB + age_at_vote + gender + zivilstand + adult_household_historic_turnout,
          data = data)
m_4 <- lm(participate ~ schultyp_new*adult_household_historic_turnout_fct + gender + age_at_vote + migrationBB + zivilstand,
          data = data)
m_6 <- lm(participate ~ highest_value*schultyp_new + mean_stimmbet_perGemeindeWE + age_at_vote + gender + migrationBB + zivilstand + adult_household_historic_turnout,
          data = data)



stargazer::stargazer(m_1_b, m_2, m_3, m_4, m_6,
                     title = "Regression Results",
                     align = TRUE,
                     dep.var.labels = "Political Participation",
                     no.space = TRUE,
                     type = "text",
                     covariate.labels = c("High School/Voc. School with Baccalaureate (ref: Voc. School)", "High School/Voc. School with Baccalaureate and Female", "High School/Voc. School with Baccalaureate and Naturalizes", "High School/Voc. School with Baccalaureate and High Household Turnout", "High School/Voc. School with Baccalaureate and Middle Household Turnout", "High School/Voc. School with Baccalaureate and Saliency", "Female", "Naturalized", "High Household Turnout", "Middle Household Turnout", "Saliency", "Age", "Marital Status: Divorced (ref: single)", "Marital Status: Married (ref: single)", "Household Turnout (numeric)", "Turnout per Municipality"),
                     order = c("schultyp_new", "gender", "migrationBB", "adult_household_historic_turnout_fct", "highest_value", "age_at_vote", "zivilstand", "adult_household_historic_turnout", "mean_stimmbet_perGemeindeWE"))
#
stargazer::stargazer(m_1_b, m_2, m_3, m_4, m_6,
                     title = "Regression Results",
                     align = TRUE,
                     dep.var.labels = "Political Participation",
                     no.space = TRUE,
                     type = "latex",
                     covariate.labels = c("High School/Voc. School with Baccalaureate (ref: Voc. School)", "High School/Voc. School with Baccalaureate and Female", "High School/Voc. School with Baccalaureate and Naturalizes", "High School/Voc. School with Baccalaureate and High Household Turnout", "High School/Voc. School with Baccalaureate and Middle Household Turnout", "High School/Voc. School with Baccalaureate and Saliency", "Female", "Naturalized", "High Household Turnout", "Middle Household Turnout", "Saliency", "Age", "Marital Status: Divorced (ref: single)", "Marital Status: Married (ref: single)", "Household Turnout (numeric)", "Turnout per Municipality"),
                     order = c("schultyp_new", "gender", "migrationBB", "adult_household_historic_turnout_fct", "highest_value", "age_at_vote", "zivilstand", "adult_household_historic_turnout", "mean_stimmbet_perGemeindeWE"),
                      out = "Figures/final/Regressiontable.tex")
```


